Making interactive choropleths for disease outbreaks data using leaflet in R
In these notes, you’ll learn how to load the data from the GitHub repository of the disease outbreaks project and use the leaflet library in R to create a choropleth map showing the geographic distribution of disease outbreaks from 1996 to September 30, 2024.
The final output will look like this:
Step 1: Load libraries
The following packages are required to replicate the choropleth map.
library(lubridate)
library(stringi)
library(httr)
library(jsonlite)
library(dplyr)
library(tidyr)
library(leaflet)
library(RColorBrewer)
library(htmltools)
library(sf)
library(bslib)
library(htmlwidgets)Step 2: Load data from GitHub
Now, let’s fetch the latest disease outbreaks data from the GitHub repository using the GitHub API. The file’s name changes according to the last update, having a pattern beginning with the prefix “outbreaks_”, followed by the date of last update. For instance, for the version updated on September 30, 2024, the corresponding .csv file is outbreaks_30092024.csv.
# URL to get the latest outbreak file
url_api <- "https://api.github.com/repos/jatorresmunguia/disease_outbreak_news/contents/Last%20update"
# Automatically detect the file ending with prefix 'outbreaks'
last_file <- fromJSON(content(GET(url_api), as = "text"))$name[grepl(fromJSON(content(GET(url_api), as = "text"))$name, pattern = paste0("^outbreaks"))]
# Filter to find the CSV file
rdata_file <- last_file[grepl(".csv$", last_file)]
file_name <- basename(rdata_file)
# Load the outbreak data from the CSV file
outbreaks <- read.csv(paste0("https://raw.githubusercontent.com/jatorresmunguia/disease_outbreak_news/refs/heads/main/Last%20update", "/", rdata_file), row.names = 1, header = TRUE)Step 3: Load geospatial data
To produce the choropleth maps, it is required to get the country administrative boundaries from a geospatial object. In this tutorial, the shapefile from Société OPENDATASOFT is used.
# URL for the ZIP file containing the shapefile (administrative boundaries)
url <- "https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/world-administrative-boundaries/exports/shp?lang=en&timezone=America%2FGuatemala"
# Create temporary file for the ZIP
temp_zip <- tempfile(fileext = ".zip")
# Create a temporary directory to unzip the files
temp_dir <- tempdir()
# Download the ZIP file
download.file(url, temp_zip, mode = "wb")
# Unzip the file
unzip(temp_zip, exdir = temp_dir)
# Find the .shp file
shp_file <- list.files(temp_dir, pattern = "\\.shp$", full.names = TRUE)
# Load the shapefile into R
shpsf <- st_read(shp_file, quiet = TRUE)
# Cleaning the shapefile's ISO3 country codes
shpsf[!is.na(shpsf$iso3) & shpsf$iso3 == "IMY", "iso3"] <- "IMN"
shpsf[!is.na(shpsf$name) & shpsf$name == "Jersey", "iso3"] <- "JEY"Step 4: Reshape the data
Here, we summarize the outbreaks data by iso3 code, year, and disease, and calculate the total number of outbreaks per country, and reshape to long format.
data_base <- outbreaks |>
# Group the data
group_by(iso3, Year, icd104n) |>
# Summarize the number of outbreaks
summarise(count = n(), .groups = 'drop') |>
# Reshape to wide format
pivot_wider(names_from = icd104n, values_from = count, values_fill = 0)
data_base <- data_base |>
# Replace missing values (NA) with 0
mutate(across(-c(iso3, Year), ~replace(., is.na(.), 0))) |>
# Calculate the total outbreaks per country
mutate(`All diseases` = rowSums(across(-c(iso3, Year)))) |>
# Reshape to long format
pivot_longer(!c(iso3, Year), names_to = "Disease", values_to = "outbreaks") Step 5: Add the geographic attributes to the outbreaks data
After loading the shapefile containing the administrative boundaries for countries, we need to integrate this geographic information into the disease outbreaks data. To do this, we have to merge the data_base object with the shpsf using iso3 country codes as key variable.
outbreaks_all <- data_base |>
# Filter the data to include only the "All diseases" category, which represents the total number of outbreaks per country.
filter(Disease == "All diseases") |>
# Group the data by country code (iso3) and summarize the total number of outbreaks for each country.
group_by(iso3) |>
summarise(outbreaks = sum(outbreaks)) |>
# Perform a right join with the spatial data (`shpsf`) to combine the outbreak data with geographical boundaries.
right_join(shpsf, by = "iso3") |>
# Convert the resulting data into a spatial data frame using `st_as_sf()` to prepare for mapping.
st_as_sf()Note: You can also use filter(between(Year, left = , right = )) to exclusively include a subset of observations by specifying a date range. For example, setting left = 2010 and right = 2020 would filter the data to only include outbreaks that occurred between 2010 and 2020. This is useful when you want to focus on a specific time period within your dataset.
Step 6: Making the interactive map
To create the interactive choropleth map using leaflet library, we first set up a palette to categorize the number of outbreaks into bins, which is essential for visual differentiation on the choropleth map.
# Define the color palette with bins for categorizing outbreak numbers
mybins <- c(0, 10, 20, 30, 40, 50, 60, 70)
mypalette <- colorBin(palette = "PuRd",
domain = outbreaks_all$outbreaks,
na.color = "transparent", bins = mybins)Then, we create labels to display the country name and the number of outbreaks when hovering over the choropleth map.
# Create labels for each country
mytext <- paste0("<b>", outbreaks_all$name, "</b>", "<br/>",
outbreaks_all$outbreaks, " ", "outbreaks"
) |>
lapply(htmltools::HTML)Add a title for the map
# Add map title
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
left: 50%;
transform: translateX(-50%);
text-align: left;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 20px;
color: black;
}
"))
# Create the map
title <- tags$div(
tag.map.title, HTML("Geographic distribution of disease outbreaks, Jan 1996-Sep 2024")
)Finally, we set up the Leaflet map with tiles, polygons, labels, a legend, and a title.
leaflet(outbreaks_all, options = leafletOptions(zoomControl = FALSE)) |>
addTiles() |>
addProviderTiles(providers$Esri.WorldTopoMap) |>
setView(lat = 10, lng = 0, zoom = 2) |>
# Add polygons for countries and their outbreak data
addPolygons(
fillColor = ~mypalette(outbreaks),
stroke = TRUE,
fillOpacity = 0.9,
color = "white",
weight = 0.3,
label = mytext,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "13px",
direction = "auto"
)
) |>
# Add a legend to explain the color scale
addLegend(
pal = mypalette, values = ~outbreaks, opacity = 0.9,
title = "Number of outbreaks:",
position = "bottomleft"
) |>
# Add the title to the map
addControl(title, position = "topleft", className = "map-title") |>
# Re-add the zoom control to the top-right corner
onRender("function(el, x) {
L.control.zoom({position:'topright'}).addTo(this);
}")Note: You can add card(full_screen = TRUE) from bslib library to add an icon to expand the card’s size to the browser window.